\begin{proof}[\textbf{Solution~\ref{ex:information_theory:entropy_1_character_strings_language}}]
The entropy of the language is 
$$
\begin{array}{l}
\displaystyle
\frac{1}{3} \log_2(3) + \frac{1}{12} \log_2(12) 
                      + \frac{1}{4} \log_2(4) + \frac{1}{3} \log_2(3) \\ \\
\displaystyle
= \frac{2}{3} \log_2(3) + \frac{1}{12} (2 + \log_2(3)) + \frac{1}{2} 
= \frac{2}{3} + \frac{3}{4} \log_2(3), 
\end{array}
$$
which is approximately 1.855.
\end{proof}

\begin{proof}[\textbf{Solution~\ref{ex:information_theory:entropy_language_2_character_srings}}]
By rearranging the sum
$$
\begin{array}{l}
\displaystyle
\sum_{x\in\rX} \sum_{y\in\rX} P(xy) \log_2(P(xy)) \\
\displaystyle
 = \sum_{x\in\rX} \sum_{y\in\rX} P(x)P(y) 
   \big( \log_2 P(x) + \log_2 P(y) \big)
\end{array}
$$
one finds the entropy to be double that of Exercise 1, or about 3.711.
This is consistent with the interpretation of the entropy as the length 
of a random element of a language in some theoretically optimal encoding.
\end{proof}

\begin{proof}[\textbf{Solution~\ref{ex:information_theory:strings_length_2}}]
The $1$-character frequencies can be defined as the average of the 
character frequencies in the 1st and 2nd positions, but these turn out 
to be the same for each character, and agree witht the frequencies of 
Exercise 1.  
\end{proof}

\begin{proof}[\textbf{Solution~\ref{ex:information_theory:entropy_each_language}}]
The entropy of the language of Exercise 3 is approximately 3.633, 
compared to an entropy of about 3.711 for that of Exercise 2.  

The language of Exercise~2 is the most random space with given 1~character 
frequences.  The lower entropy in Exercise~3 could have been predicted 
since the probabilities agrees with the 1~character frequencies, while 
additional structure (less uncertainty) is built into the 2~character 
probabilities, since in general $P({\tt XY}) \ne P({\tt YX})$.
\end{proof}

\begin{proof}[\textbf{Solution~\ref{ex:information_theory:infinite_language}}]
One must verify the equality 
$$
\sum_{n=1}^\infty \frac{1}{2^n} \log_2(2^n) 
    = \sum_{n=1}^\infty \frac{n}{2^n} = 2.
$$
We do this by first verifying the equality 
$$
\sum_{n=0}^\infty \frac{1}{2^n} + 
\sum_{n=1}^\infty \frac{n}{2^n} = 2 \sum_{n=1}^\infty \frac{n}{2^n},
$$
together with the standard identity $\sum_{n=0}^\infty 1/2^n = 2$.
\end{proof}

